add plot_ccf

tvdboom · Jan 30, 2024 · f96b802 · f96b802
1 parent 9cbe039
commit f96b802
Show file tree

Hide file tree

Showing 20 changed files with 635 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -90,6 +90,24 @@ Example steps taken by ATOM's pipeline:
 
 <br><br>
 
+❗ Why you should use ATOM
+-------------------------
+
+* [Multiple data cleaning and feature engineering classes](https://tvdboom.github.io/ATOM/latest/user_guide/data_cleaning/)
+* [55+ classification, regression and forecast models](https://tvdboom.github.io/ATOM/latest/user_guide/models/) to choose from
+* [Possibility to train multiple models with one line of code](https://tvdboom.github.io/ATOM/latest/getting_started/#usage)
+* [Fast implementation of hyperparameter tuning](https://tvdboom.github.io/ATOM/latest/user_guide/training/#hyperparameter-tuning)
+* [Easy way to compare the results from different models](https://tvdboom.github.io/ATOM/latest/user_guide/training/)
+* [50+ plots to analyze the data and model performance](https://tvdboom.github.io/ATOM/latest/user_guide/plots/#available-plots)
+* [Avoid refactoring to test new pipelines](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#branches)
+* [Native support for GPU training](https://tvdboom.github.io/ATOM/latest/user_guide/accelerating/#gpu-acceleration)
+* [25+ example notebooks to get you started](https://tvdboom.github.io/ATOM/latest/examples/accelerating_cuml/)
+* [Full integration with multilabel and multioutput datasets](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#multioutput-tasks)
+* [Native support for sparse datasets](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#sparse-datasets)
+* [Build-in transformers for NLP pipelines](https://tvdboom.github.io/ATOM/latest/user_guide/nlp/)
+* [Avoid endless imports and documentation lookups](https://tvdboom.github.io/ATOM/latest/getting_started/#usage)
+
+<br><br>
 
 🛠️ Installation
 ---------------
@@ -183,10 +201,10 @@ atom.plot_lift()
 --- | ---
 ⭐ **[About](https://tvdboom.github.io/ATOM/latest/release_history/)** | Learn more about the package.
 🚀 **[Getting started](https://tvdboom.github.io/ATOM/latest/getting_started/)** | New to ATOM? Here's how to get you started!
-📢 **[Release history](https://tvdboom.github.io/ATOM/latest/release_history/)** | What are the new features of the latest release?
 👨‍💻 **[User guide](https://tvdboom.github.io/ATOM/latest/user_guide/introduction/)** | How to use ATOM and its features.
 🎛️ **[API Reference](https://tvdboom.github.io/ATOM/latest/API/ATOM/atomclassifier/)** | The detailed reference for ATOM's API.
 📋 **[Examples](https://tvdboom.github.io/ATOM/latest/examples/binary_classification/)** | Example notebooks show you what can be done and how.
+📢 **[Chagelog](https://tvdboom.github.io/ATOM/latest/changelog/)** | What are the new features in the latest release?
 ❔ **[FAQ](https://tvdboom.github.io/ATOM/latest/faq/)** | Get answers to frequently asked questions.
 🔧 **[Contributing](https://tvdboom.github.io/ATOM/latest/contributing/)** | Do you wan to contribute to the project? Read this before creating a PR.
 🌳 **[Dependencies](https://tvdboom.github.io/ATOM/latest/dependencies/)** | Which other packages does ATOM depend on?

diff --git a/atom/atom.py b/atom/atom.py
@@ -1261,12 +1261,12 @@ def _add_transformer(
 
             # Memoize the fitted transformer_c for repeated instantiations of atom
             fit = self._memory.cache(fit_one)
-            kwargs = dict(
-                estimator=transformer_c,
-                X=self.X_train,
-                y=self.y_train,
+            kwargs = {
+                "estimator": transformer_c,
+                "X": self.X_train,
+                "y": self.y_train,
                 **fit_params,
-            )
+            }
 
             # Check if the fitted estimator is retrieved from cache to inform
             # the user, else user might notice the lack of printed messages

diff --git a/atom/baserunner.py b/atom/baserunner.py
@@ -1431,13 +1431,13 @@ def stacking(
                 "train multiple Stacking models within the same instance."
             )
 
-        kw_model = dict(
-            goal=self._goal,
-            config=self._config,
-            branches=self._branches,
-            metric=self._metric,
+        kw_model = {
+            "goal": self._goal,
+            "config": self._config,
+            "branches": self._branches,
+            "metric": self._metric,
             **{attr: getattr(self, attr) for attr in BaseTransformer.attrs},
-        )
+        }
 
         # The parameter name is different in sklearn and sktime
         regressor = "regressor" if self.task.is_forecast else "final_estimator"

diff --git a/atom/basetrainer.py b/atom/basetrainer.py
@@ -154,13 +154,13 @@ def _prepare_parameters(self):
 
         # Define models ============================================ >>
 
-        kwargs = dict(
-            goal=self._goal,
-            config=self._config,
-            branches=self._branches,
-            metric=self._metric,
+        kwargs = {
+            "goal": self._goal,
+            "config": self._config,
+            "branches": self._branches,
+            "metric": self._metric,
             **{attr: getattr(self, attr) for attr in BaseTransformer.attrs},
-        )
+        }
 
         inc = []
         exc = []

diff --git a/atom/basetransformer.py b/atom/basetransformer.py
@@ -390,7 +390,7 @@ def _inherit(self, obj: T_Estimator, fixed: tuple[str, ...] = ()) -> T_Estimator
         for p in obj.get_params():
             if p in fixed:
                 continue
-            elif match := re.search("(n_jobs|random_state)$|__\1$", p):
+            elif match := re.search("^(n_jobs|random_state)$|__\1$", p):
                 obj.set_params(**{p: getattr(self, match.group())})
             elif re.search(r"^sp$|__sp$", p) and hasattr(self, "_config") and self._config.sp:
                 if self.multiple_seasonality:

diff --git a/atom/branch/branch.py b/atom/branch/branch.py
@@ -571,7 +571,8 @@ def _get_columns(
             select from.
 
         only_numerical: bool, default=False
-            Whether to select only numerical columns.
+            Whether to select only numerical columns when
+            `columns=None`.
 
         Returns
         -------

diff --git a/atom/data_cleaning.py b/atom/data_cleaning.py
@@ -773,7 +773,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None) -> Self:
         self._log("Fitting Cleaner...", 1)
 
         if X is not None and self.drop_dtypes is not None:
-            self._drop_cols = list(X.select_dtypes(include=list(self.drop_dtypes)).columns)
+            self._drop_cols = list(X.select_dtypes(include=lst(self.drop_dtypes)).columns)
 
         if y is not None:
             if isinstance(y, series_t):

diff --git a/atom/models/ensembles.py b/atom/models/ensembles.py
@@ -59,11 +59,23 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
             Estimator instance.
 
         """
+        # We use _est_class with get_params instead of just a dict
+        # to also fix the parameters of the models in the ensemble
+        estimator = self._est_class(
+            **{
+                "estimators" if not self.task.is_forecast else "forecasters": [
+                    (m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
+                    for m in self._models
+                ]
+            }
+        )
+
+        # Drop the model names from params since those
+        # are not direct parameters of the ensemble
         default = {
-            "estimators" if not self.task.is_forecast else "forecasters": [
-                (m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
-                for m in self._models
-            ]
+            k: v
+            for k, v in estimator.get_params().items()
+            if k not in (m.name for m in self._models)
         }
 
         return super()._get_est(default | params)
@@ -115,11 +127,23 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
             Estimator instance.
 
         """
+        # We use _est_class with get_params instead of just a dict
+        # to also fix the parameters of the models in the ensemble
+        estimator = self._est_class(
+            **{
+                "estimators" if not self.task.is_forecast else "forecasters": [
+                    (m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
+                    for m in self._models
+                ]
+            }
+        )
+
+        # Drop the model names from params since those
+        # are not direct parameters of the ensemble
         default = {
-            "estimators" if not self.task.is_forecast else "forecasters": [
-                (m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
-                for m in self._models
-            ]
+            k: v
+            for k, v in estimator.get_params().items()
+            if k not in (m.name for m in self._models)
         }
 
         return super()._get_est(default | params)
diff --git a/atom/models/ts.py b/atom/models/ts.py
@@ -344,7 +344,7 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
             Estimator instance.
 
         """
-        return super()._get_est({"auto": True} | params)
+        return super()._get_est({"sp": self._config.sp.sp or 1, "auto": True} | params)
 
     @staticmethod
     def _get_distributions() -> dict[str, BaseDistribution]:
@@ -900,6 +900,22 @@ class NaiveForecaster(BaseModel):
         "forecast": "sktime.forecasting.naive.NaiveForecaster"
     }
 
+    def _get_est(self, params: dict[str, Any]) -> Predictor:
+        """Get the model's estimator with unpacked parameters.
+
+        Parameters
+        ----------
+        params: dict
+            Hyperparameters for the estimator.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        return super()._get_est({"sp": self._config.sp.sp or 1} | params)
+
     @staticmethod
     def _get_distributions() -> dict[str, BaseDistribution]:
         """Get the predefined hyperparameter distributions.
@@ -1258,6 +1274,24 @@ class STL(BaseModel):
         "forecast": "sktime.forecasting.trend.STLForecaster"
     }
 
+    def _get_est(self, params: dict[str, Any]) -> Predictor:
+        """Get the model's estimator with unpacked parameters.
+
+        Parameters
+        ----------
+        params: dict
+            Hyperparameters for the estimator.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        # Parameter sp must be provided to STL and >=2
+        # None is only accepted if y has freq in index but sktime passes array
+        return super()._get_est({"sp": self._config.sp.sp or 2} | params)
+
     @staticmethod
     def _get_distributions() -> dict[str, BaseDistribution]:
         """Get the predefined hyperparameter distributions.
@@ -1420,6 +1454,22 @@ class Theta(BaseModel):
         "forecast": "sktime.forecasting.theta.ThetaForecaster"
     }
 
+    def _get_est(self, params: dict[str, Any]) -> Predictor:
+        """Get the model's estimator with unpacked parameters.
+
+        Parameters
+        ----------
+        params: dict
+            Hyperparameters for the estimator.
+
+        Returns
+        -------
+        Predictor
+            Estimator instance.
+
+        """
+        return super()._get_est({"sp": self._config.sp.sp or 1} | params)
+
     @staticmethod
     def _get_distributions() -> dict[str, BaseDistribution]:
         """Get the predefined hyperparameter distributions.